{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE OverloadedStrings #-}

{- |
   Module      : Text.Pandoc.Readers.TikiWiki
   Copyright   : Copyright (C) 2017 Robin Lee Powell
   License     : GNU GPL, version 2 or above

   Maintainer  : Robin Lee Powell <robinleepowell@gmail.com>
   Stability   : alpha
   Portability : portable

Conversion of TikiWiki text to 'Pandoc' document.
-}

module Text.Pandoc.Readers.TikiWiki ( readTikiWiki
                                    ) where

import Control.Monad
import Control.Monad.Except (throwError)
import qualified Data.Foldable as F
import Data.List (dropWhileEnd)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getVerbosity)
import Text.Pandoc.Definition
import Text.Pandoc.Logging (Verbosity (..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed)
import Text.Pandoc.Shared (safeRead)
import Text.Pandoc.XML (fromEntities)
import Text.Printf (printf)

-- | Read TikiWiki from an input string and return a Pandoc document.
readTikiWiki :: (PandocMonad m, ToSources a)
          => ReaderOptions
          -> a
          -> m Pandoc
readTikiWiki opts s = do
  let sources = ensureFinalNewlines 2 (toSources s)
  res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
  case res of
       Left e  -> throwError e
       Right d -> return d

type TikiWikiParser = ParsecT Sources ParserState

--
-- utility functions
--

tryMsg :: Text -> TikiWikiParser m a -> TikiWikiParser m a
tryMsg msg p = try p <?> T.unpack msg

skip :: TikiWikiParser m a -> TikiWikiParser m ()
skip parser = Control.Monad.void parser

--
-- main parser
--

parseTikiWiki :: PandocMonad m => TikiWikiParser m Pandoc
parseTikiWiki = do
  bs <- mconcat <$> many block
  spaces
  eof
  return $ B.doc bs

block :: PandocMonad m => TikiWikiParser m B.Blocks
block = do
  verbosity <- getVerbosity
  pos <- getPosition
  res <- mempty <$ skipMany1 blankline
         <|> blockElements
         <|> para
  skipMany blankline
  when (verbosity >= INFO) $
    trace (T.pack $ printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res))
  return res

blockElements :: PandocMonad m => TikiWikiParser m B.Blocks
blockElements = choice [ table
                       , hr
                       , header
                       , mixedList
                       , definitionList
                       , codeMacro
                       ]

-- top
-- ----
-- bottom
--
-- ----
--
hr :: PandocMonad m => TikiWikiParser m B.Blocks
hr = try $ do
  string "----"
  many (char '-')
  newline
  return B.horizontalRule

-- ! header
--
-- !! header level two
--
-- !!! header level 3
--
header :: PandocMonad m => TikiWikiParser m B.Blocks
header = tryMsg "header" $ do
  level <- fmap length (many1 (char '!'))
  guard $ level <= 6
  skipSpaces
  content <- B.trimInlines . mconcat <$> manyTill inline newline
  attr <- registerHeader nullAttr content
  return $ B.headerWith attr level content

tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks]
tableRow = try $ do
--  row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n"))
--  return $ map (B.plain . mconcat) row
  row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn . T.pack) (try $ string "|" <* notFollowedBy (oneOf "|\n"))
  return $ map B.plain row
  where
    parseColumn x = do
      parsed <- parseFromString (many1 inline) x
      return $ mconcat parsed



-- Tables:
--
-- ||foo||
--
-- ||row1-column1|row1-column2||row2-column1|row2-column2||
--
-- ||row1-column1|row1-column2
-- row2-column1|row2-column2||
--
-- ||row1-column1|row1-column2
-- row2-column1|row2-column2||row3-column1|row3-column2||
--
-- || Orange | Apple     | more
--  Bread  | Pie       | more
--  Butter | Ice cream | and more ||
--
table :: PandocMonad m => TikiWikiParser m B.Blocks
table = try $ do
  string "||"
  rows <- sepBy1 tableRow (try $ string "\n" <|> (string "||" <* notFollowedBy (string "\n")))
  string "||"
  newline
  -- return $ B.simpleTable (headers rows) $ trace ("rows: " ++ (show rows)) rows
  return $ B.simpleTable (headers rows) rows
  where
    -- The headers are as many empty strings as the number of columns
-- in the first row
    headers [] = []
    headers (r:_) = replicate (length r) ((B.plain . B.str) "")

para :: PandocMonad m => TikiWikiParser m B.Blocks
para =  fmap (result . mconcat) ( many1Till inline endOfParaElement)
 where
   endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
   endOfInput       = try $ skipMany blankline >> skipSpaces >> eof
   endOfPara        = try $ blankline >> skipMany1 blankline
   newBlockElement  = try $ blankline >> skip blockElements
   result content   = if F.all (==Space) content
                      then mempty
                      else B.para $ B.trimInlines content

-- ;item 1: definition 1
-- ;item 2: definition 2-1
-- + definition 2-2
-- ;item ''3'': definition ''3''
--
definitionList :: PandocMonad m => TikiWikiParser m B.Blocks
definitionList = tryMsg "definitionList" $ do
  elements <-many1 parseDefinitionListItem
  return $ B.definitionList elements
  where
    parseDefinitionListItem :: PandocMonad m => TikiWikiParser m (B.Inlines, [B.Blocks])
    parseDefinitionListItem = do
      skipSpaces >> char ';' <* skipSpaces
      term <- many1Till inline $ char ':' <* skipSpaces
      line <- listItemLine 1
      return (mconcat term, [B.plain line])

data ListType = None | Numbered | Bullet deriving (Ord, Eq, Show)

data ListNesting = LN { lntype :: ListType, lnnest :: Int } deriving (Ord, Eq, Show)

-- The first argument is a stack (most recent == head) of our list
-- nesting status; the list type and the nesting level; if we're in
-- a number list in a bullet list it'd be
-- [LN Numbered 2, LN Bullet 1]
--
-- Mixed list example:
--
-- # one
-- # two
-- ** two point one
-- ** two point two
-- # three
-- # four
--
mixedList :: PandocMonad m => TikiWikiParser m B.Blocks
mixedList = try $ do
  items <- try $ many1 listItem
  return $ mconcat $ fixListNesting $ spanFoldUpList (LN None 0) items

-- See the "Handling Lists" section of DESIGN-CODE for why this
-- function exists.  It's to post-process the lists and do some
-- mappends.
--
-- We need to walk the tree two items at a time, so we can see what
-- we're going to join *to* before we get there.
--
-- Because of that, it seemed easier to do it by hand than to try to
-- figre out a fold or something.
fixListNesting :: [B.Blocks] -> [B.Blocks]
fixListNesting [] = []
fixListNesting [first] = [recurseOnList first]
-- fixListNesting nestall | trace ("\n\nfixListNesting: " ++ (show nestall)) False = undefined
-- fixListNesting nestall@(first:second:rest) =
fixListNesting (first:second:rest) =
    case B.toList second of
      (BulletList _ : _) -> fixListNesting $ mappend (recurseOnList first) (recurseOnList second) : rest
      (OrderedList _ _ : _) -> fixListNesting $ mappend (recurseOnList first) (recurseOnList second) : rest
      _ -> recurseOnList first : fixListNesting (second:rest)

-- This function walks the Block structure for fixListNesting,
-- because it's a bit complicated, what with converting to and from
-- lists and so on.
recurseOnList :: B.Blocks -> B.Blocks
-- recurseOnList item | trace ("rOL: " ++ (show $ length $ B.toList item) ++ ", " ++ (show $ B.toList item)) False = undefined
recurseOnList items
  | length (B.toList items) == 1 =
      case B.toList items of
        (BulletList listItems : _) -> B.bulletList $ fixListNesting $ map B.fromList listItems
        (OrderedList _ listItems : _) -> B.orderedList $ fixListNesting $ map B.fromList listItems
        _ -> items

  -- The otherwise works because we constructed the blocks, and we
  -- know for a fact that no mappends have been run on them; each
  -- Blocks consists of exactly one Block.
  --
  -- Anything that's not like that has already been processed by
  -- fixListNesting; don't bother to process it again.
  | otherwise = items


-- Turn the list if list items into a tree by breaking off the first
-- item, splitting the remainder of the list into items that are in
-- the tree of the first item and those that aren't, wrapping the
-- tree of the first item in its list time, and recursing on both
-- sections.
spanFoldUpList :: ListNesting -> [(ListNesting, B.Blocks)] -> [B.Blocks]
spanFoldUpList _ [] = []
spanFoldUpList ln [first] =
  listWrap ln (fst first) [snd first]
spanFoldUpList ln (first:rest) =
  let (span1, span2) = span (splitListNesting (fst first)) rest
      newTree1 = listWrap ln (fst first) $ snd first : spanFoldUpList (fst first) span1
      newTree2 = spanFoldUpList ln span2
  in
    newTree1 ++ newTree2

-- Decide if the second item should be in the tree of the first
-- item, which is true if the second item is at a deeper nesting
-- level and of the same type.
splitListNesting :: ListNesting -> (ListNesting, B.Blocks) -> Bool
splitListNesting ln1 (ln2, _)
  | lnnest ln1 < lnnest ln2 =
  True
  | ln1 == ln2 =
  True
  | otherwise =
  False

-- If we've moved to a deeper nesting level, wrap the new level in
-- the appropriate type of list.
listWrap :: ListNesting -> ListNesting -> [B.Blocks] -> [B.Blocks]
listWrap upperLN curLN retTree =
  if upperLN == curLN then
    retTree
  else
    case lntype curLN of
      None     -> []
      Bullet   -> [B.bulletList retTree]
      Numbered -> [B.orderedList retTree]

listItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
listItem = choice [
    bulletItem
  , numberedItem
  ]


-- * Start each line
-- * with an asterisk (*).
-- ** More asterisks gives deeper
-- *** and deeper levels.
--
bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
bulletItem = try $ do
  prefix <- many1 $ char '*'
  many $ char ' '
  content <- listItemLine (length prefix)
  return (LN Bullet (length prefix), B.plain content)

-- # Start each line
-- # with a number (1.).
-- ## More number signs gives deeper
-- ### and deeper
--
numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
numberedItem = try $ do
  prefix <- many1 $ char '#'
  many $ char ' '
  content <- listItemLine (length prefix)
  return (LN Numbered (length prefix), B.plain content)

listItemLine :: PandocMonad m => Int -> TikiWikiParser m B.Inlines
listItemLine nest = lineContent >>= parseContent
  where
    lineContent = do
      content <- anyLine
      continuation <- optionMaybe listContinuation
      return $ filterSpaces content <> "\n" <> Data.Maybe.fromMaybe "" continuation
    filterSpaces = T.dropWhileEnd (== ' ')
    listContinuation = string (replicate nest '+') >> lineContent
    parseContent x = do
      parsed <- parseFromString (many1 inline) x
      return $ mconcat $ dropWhileEnd (== B.space) parsed

-- Turn the CODE macro attributes into Pandoc code block attributes.
mungeAttrs :: [(Text, Text)] -> (Text, [Text], [(Text, Text)])
mungeAttrs rawAttrs = ("", classes, rawAttrs)
  where
    -- "colors" is TikiWiki CODE macro for "name of language to do
    -- highlighting for"; turn the value into a class
    color = fromMaybe "" $ lookup "colors" rawAttrs
    -- ln = 1 means line numbering.  It's also the default.  So we
    -- emit numberLines as a class unless ln = 0
    lnRaw = fromMaybe "1" $ lookup "ln" rawAttrs
    ln = if lnRaw == "0" then
            ""
         else
            "numberLines"
    classes = filter (/= "") [color, ln]

codeMacro :: PandocMonad m => TikiWikiParser m B.Blocks
codeMacro = try $ do
  string "{CODE("
  rawAttrs <- macroAttrs
  string ")}"
  body <- T.pack <$> manyTill anyChar (try (string "{CODE}"))
  newline
  if not (null rawAttrs)
    then
      return $ B.codeBlockWith (mungeAttrs rawAttrs) body
    else
      return $ B.codeBlock body


--
-- inline parsers
--

inline :: PandocMonad m => TikiWikiParser m B.Inlines
inline = choice [ whitespace
                , noparse
                , strong
                , emph
                , nbsp
                , image
                , htmlComment
                , strikeout
                , code
                , wikiLink
                , notExternalLink
                , externalLink
                , superTag
                , superMacro
                , subTag
                , subMacro
                , escapedChar
                , colored
                , centered
                , underlined
                , boxed
                , breakChars
                , str
                , symbol
                ] <?> "inline"

whitespace :: PandocMonad m => TikiWikiParser m B.Inlines
whitespace = lb <|> regsp
  where lb = try $ skipMany spaceChar >> linebreak >> return B.space
        regsp = try $ skipMany1 spaceChar >> return B.space

-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
nbsp :: PandocMonad m => TikiWikiParser m B.Inlines
nbsp = try $ do
  string "~hs~"
  return $ B.str " NOT SUPPORTED BEGIN: ~hs~ (non-breaking space) :END "

-- UNSUPPORTED, as the desired behaviour (that the data be
-- *retained* and stored as a comment) doesn't exist in calibre, and
-- silently throwing data out seemed bad.
htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines
htmlComment = try $ do
  string "~hc~"
  inner <- fmap T.pack $ many1 $ noneOf "~"
  string "~/hc~"
  return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " <> inner <> " ~/hc~ :END "

linebreak :: PandocMonad m => TikiWikiParser m B.Inlines
linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
  where lastNewline  = eof >> return mempty
        innerNewline = return B.space

between :: (Monoid c, PandocMonad m, Show b) => TikiWikiParser m a -> TikiWikiParser m b -> (TikiWikiParser m b -> TikiWikiParser m c) -> TikiWikiParser m c
between start end p =
  mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)

enclosed :: (Monoid b, PandocMonad m, Show a) => TikiWikiParser m a -> (TikiWikiParser m a -> TikiWikiParser m b) -> TikiWikiParser m b
enclosed sep p = between sep (try $ sep <* endMarker) p
  where
    endMarker   = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|'_") <|> eof
    endSpace    = (spaceChar <|> newline) >> return B.space


nestedInlines :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m B.Inlines
nestedInlines end = innerSpace <|> nestedInline
  where
    innerSpace   = try $ whitespace <* notFollowedBy end
    nestedInline = notFollowedBy whitespace >> inline

-- {img attId="39" imalign="right" link="http://info.tikiwiki.org" alt="Panama Hat"}
--
-- {img attId="37", thumb="mouseover", styleimage="border", desc="150"}
--
-- {img src="img/wiki_up/393px-Pears.jpg" thumb="y" imalign="center" stylebox="border" button="y" desc="Pretty pears" max="200" rel="box"}
--
image :: PandocMonad m => TikiWikiParser m B.Inlines
image = try $ do
  string "{img "
  rawAttrs <- sepEndBy1 imageAttr spaces
  string "}"
  let src = fromMaybe "" $ lookup "src" rawAttrs
  let title = fromMaybe src $ lookup "desc" rawAttrs
  let alt = fromMaybe title $ lookup "alt" rawAttrs
  let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs
  if not (T.null src)
    then
      return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt)
    else
      return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " <> printAttrs rawAttrs <> "} :END "
  where
    printAttrs attrs = T.unwords $ map (\(a, b) -> a <> "=\"" <> b <> "\"") attrs

imageAttr :: PandocMonad m => TikiWikiParser m (Text, Text)
imageAttr = try $ do
  key <- many1 (noneOf "=} \t\n")
  char '='
  optional $ char '"'
  value <- many1 (noneOf "}\"\n")
  optional $ char '"'
  optional $ char ','
  return (T.pack key, T.pack value)


-- __strong__
strong :: PandocMonad m => TikiWikiParser m B.Inlines
strong = try $ fmap B.strong (enclosed (string "__") nestedInlines)

-- ''emph''
emph :: PandocMonad m => TikiWikiParser m B.Inlines
emph = try $ fmap B.emph (enclosed (string "''") nestedInlines)

-- ~246~
escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines
escapedChar = try $ do
  string "~"
  mNumber <- safeRead . T.pack <$> many1 digit
  string "~"
  return $ B.str $
    case mNumber of
      Just number -> T.singleton $ toEnum (number :: Int)
      Nothing     -> ""

-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
centered :: PandocMonad m => TikiWikiParser m B.Inlines
centered = try $ do
  string "::"
  inner <- fmap T.pack $ many1 $ noneOf ":\n"
  string "::"
  return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" <> inner <> ":: :END "

-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
colored :: PandocMonad m => TikiWikiParser m B.Inlines
colored = try $ do
  string "~~"
  inner <- fmap T.pack $ many1 $ noneOf "~\n"
  string "~~"
  return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" <> inner <> "~~ :END "

-- ===underlined===
underlined :: PandocMonad m => TikiWikiParser m B.Inlines
underlined = try $ B.underline <$> enclosed (string "===") nestedInlines

-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
boxed :: PandocMonad m => TikiWikiParser m B.Inlines
boxed = try $ do
  string "^"
  inner <- fmap T.pack $ many1 $ noneOf "^\n"
  string "^"
  return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" <> inner <> "^ :END "

-- --text--
strikeout :: PandocMonad m => TikiWikiParser m B.Inlines
strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines)

nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m Text
nestedString end = innerSpace <|> countChar 1 nonspaceChar
  where
    innerSpace = try $ T.pack <$> many1 spaceChar <* notFollowedBy end

breakChars :: PandocMonad m => TikiWikiParser m B.Inlines
breakChars = try $ string "%%%" >> return B.linebreak

-- superscript: foo{TAG(tag=>sup)}super{TAG}foo / bar{SUP()}super2{SUP}bar
superTag :: PandocMonad m => TikiWikiParser m B.Inlines
superTag = try $  fmap (B.superscript . B.text . fromEntities) ( between (string "{TAG(tag=>sup)}") (string "{TAG}") nestedString)

superMacro :: PandocMonad m => TikiWikiParser m B.Inlines
superMacro = try $ do
  string "{SUP("
  manyTill anyChar (string ")}")
  body <- manyTill anyChar (string "{SUP}")
  return $ B.superscript $ B.text $ T.pack body

-- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux
subTag :: PandocMonad m => TikiWikiParser m B.Inlines
subTag = try $  fmap (B.subscript . B.text . fromEntities) ( between (string "{TAG(tag=>sub)}") (string "{TAG}") nestedString)

subMacro :: PandocMonad m => TikiWikiParser m B.Inlines
subMacro = try $ do
  string "{SUB("
  manyTill anyChar (string ")}")
  body <- manyTill anyChar (string "{SUB}")
  return $ B.subscript $ B.text $ T.pack body

-- -+text+-
code :: PandocMonad m => TikiWikiParser m B.Inlines
code = try $  fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString)

macroAttr :: PandocMonad m => TikiWikiParser m (Text, Text)
macroAttr = try $ do
  key <- many1 (noneOf "=)")
  char '='
  optional $ char '"'
  value <- many1 (noneOf " )\"")
  optional $ char '"'
  return (T.pack key, T.pack value)

macroAttrs :: PandocMonad m => TikiWikiParser m [(Text, Text)]
macroAttrs = try $ sepEndBy macroAttr spaces

-- ~np~ __not bold__ ~/np~
noparse :: PandocMonad m => TikiWikiParser m B.Inlines
noparse = try $ do
  string "~np~"
  body <- manyTill anyChar (string "~/np~")
  return $ B.str $ T.pack body

str :: PandocMonad m => TikiWikiParser m B.Inlines
str = fmap B.str (T.pack <$> many1 alphaNum <|> characterReference)

symbol :: PandocMonad m => TikiWikiParser m B.Inlines
symbol = fmap B.str (countChar 1 nonspaceChar)

-- [[not a link]
notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines
notExternalLink = try $ do
  start <- string "[["
  body <- many (noneOf "\n[]")
  end <- string "]"
  return $ B.text $ T.pack $ start ++ body ++ end

-- [http://www.somesite.org url|Some Site title]
-- ((internal link))
--
-- The ((...)) wiki links and [...] external links are handled
-- exactly the same; this abstracts that out
makeLink :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m B.Inlines
makeLink start middle end = try $ do
  st <- getState
  guard $ stateAllowLinks st
  setState $ st{ stateAllowLinks = False }
  (url, title, anchor) <- wikiLinkText start middle end
  parsedTitle <- parseFromString (many1 inline) title
  setState $ st{ stateAllowLinks = True }
  return $ B.link (url <> anchor) "" $ mconcat parsedTitle

wikiLinkText :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m (Text, Text, Text)
wikiLinkText start middle end = do
  string (T.unpack start)
  url <- T.pack <$> many1 (noneOf $ T.unpack middle ++ "\n")
  seg1 <- option url linkContent
  seg2 <- option "" linkContent
  string (T.unpack end)
  if seg2 /= ""
    then
      return (url, seg2, seg1)
    else
      return (url, seg1, "")
  where
    linkContent      = do
      char '|'
      T.pack <$> many (noneOf $ T.unpack middle)

externalLink :: PandocMonad m => TikiWikiParser m B.Inlines
externalLink = makeLink "[" "]|" "]"

-- NB: this wiki linking is unlikely to work for anyone besides me
-- (rlpowell); it happens to work for me because my Hakyll code has
-- post-processing that treats pandoc .md titles as valid link
-- targets, so something like
-- [see also this other post](My Other Page) is perfectly valid.
wikiLink :: PandocMonad m => TikiWikiParser m B.Inlines
wikiLink = makeLink "((" ")|" "))"
